#Librerias
library(IRdisplay)
library(formattable)
library(ggplot2)
library(cowplot)
library(dplyr)
library(stringr)
| Resistencia | |||||
|---|---|---|---|---|---|
| Porcentaje de algodón | 1 | 2 | 3 | 4 | 5 |
| 15 | 7 | 7 | 15 | 11 | 9 |
| 20 | 12 | 17 | 12 | 18 | 18 |
| 25 | 14 | 18 | 18 | 19 | 19 |
| 30 | 19 | 25 | 22 | 19 | 23 |
| 35 | 7 | 10 | 11 | 15 | 11 |
data_1 <- read.csv("./TP4_tables/data1.csv") # Leo los datos desde archivo .csv
# Plot
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_1, aes(x=cotton_percentage, y=tensile_strength, fill=cotton_percentage)) +
labs(
title="Diagramas de cajas para los distintos porcentajes de algodón",
x="Porcentaje de algodón",
y="Tensión de ruptura",
fill="Niveles") +
geom_boxplot(alpha=0.5) +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
data_1.aov <- aov(tensile_strength ~ cotton_percentage, data_1)
aov_test <- summary(data_1.aov)[[1]]
display_markdown('#### **ANOVA de un sentido:**')
aov_test <- cbind(c('Porcentaje de algodón', 'Residuos'), aov_test)
colnames(aov_test)[1] <- 'Source'
rownames(aov_test) <- c()
table <- formattable(aov_test, align=c('l', 'c', 'c', 'c', 'c', 'c'), list(`Source` = formatter("span",style = ~ style('text-align'='left'))))
as.htmlwidget(table, width="70%", height=NULL)
data_1.tukey <- as.data.frame(TukeyHSD(data_1.aov,ordered = TRUE, conf.level = 0.95)[1]$cotton_percentage)
data_1.tukey$names <- c(rownames(data_1.tukey))
# Gráfico de los intervalos de confianza
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_1.tukey, aes(names, diff)) +
labs(
title="Intervalos de confianza de 95% para la diferencia de medias entre tratamientos",
x="",
y="",
col="Diferencia") +
geom_errorbar(aes(ymin=lwr, ymax=upr, col=ifelse(lwr*upr > 0,'1','2')), width = 0.4, alpha=1) +
scale_color_manual(values=c('#05b5f5','#f50505'), labels=c('Significativa','No significativa'), breaks=c('1','2')) +
geom_hline(yintercept=0, linetype="dashed", col="black") +
theme_light() +
coord_flip(expand = TRUE) +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
smoothed <- data.frame(with(data_1.aov, lowess(x = data_1.aov$fitted, y = data_1.aov$residuals)))
# Gráficos
options(repr.plot.width=14, repr.plot.height=8)
res_vs_fit <- ggplot(data_1.aov) +
geom_point(aes(x=data_1.aov$fitted, y=data_1.aov$residuals), color= '#ff9696', size=3) +
geom_path(data = smoothed, aes(x = x, y = y), col="#a399ff", size=1) +
geom_hline(linetype = 2, yintercept=0, alpha=0.2) +
ggtitle("Residuals vs Fitted") +
xlab("Fitted") +
ylab("Residuals") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
qq_plot <- ggplot(data_1.aov) +
stat_qq(aes(sample = .stdresid), color= '#ff9696', size=3) +
geom_abline(col="#a399ff", size=1) +
xlab("Theoretical Quantiles") +
ylab("Standardized Residuals") +
ggtitle("Normal Q-Q") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
plot_grid(res_vs_fit, qq_plot, ncol = 2)
| Resistencia a la compresión | ||||
|---|---|---|---|---|
| Técnica de mezclado | 1 | 2 | 3 | 4 |
| 1 | 3129 | 3200 | 2800 | 2600 |
| 2 | 3000 | 3300 | 2900 | 2700 |
| 3 | 2865 | 2975 | 2985 | 2600 |
| 4 | 2890 | 3150 | 3050 | 2765 |
data_2 <- read.csv("./TP4_tables/data2.csv") # Leo los datos desde archivo .csv
data_2$mixing_method <- as.factor(data_2$mixing_method)
# Plot
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_2, aes(x=mixing_method, y=compression_strength, fill=mixing_method)) +
labs(
title="Diagramas de cajas para los distintos métodos de mezclado",
x="Método de mezclado",
y="Resistencia a la compresión",
fill="Niveles") +
geom_boxplot(alpha=0.5) +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
data_2.levels <- split(data_2 , f=data_2$mixing_method)
# Cálculo de MSTr
grand_mean <- mean(data_2$compression_strength) # media general
treatment_means <- sapply(data_2.levels, function(x) {
mean(x$compression_strength)
}) # media de cada tratamiento
J <- sapply(data_2.levels, nrow) # cantidad de observaciones para cada tratamiento
SSTr <- sum(J * (treatment_means - grand_mean)^2)
I <- length(data_2.levels) # cantidad de niveles
MSTr <- SSTr / (I - 1)
display_markdown(sprintf('$MSTr = %.f$', MSTr))
$MSTr = 11460$
# Cálculo de MSE
samples <- t(sapply(data_2.levels, function(x) {x$compression_strength})) # matriz con todas las observaciones
residuals <- samples - treatment_means #residuos
SSE <- sum(residuals^2)
N <- sum(J) # cantidad de observaciones
MSE <- SSE / (N - I)
display_markdown(sprintf('$MSE = %.f$', MSE))
$MSE = 50772$
F <- MSTr / MSE
display_markdown(sprintf('$F = %.4f$', F))
$F = 0.2257$
alpha <- 0.05
f_alpha <- qf(alpha, df1=I-1, df2=N-I, lower=FALSE)
display_markdown(sprintf('$f_{\\alpha=%.3f,\\: %.f,\\: %.f} = %.4f$', alpha, I-1, N-I, f_alpha))
$f_{\alpha=0.050,\: 3,\: 12} = 3.4903$
p_value <- pf(F, df1=I-1, df2=N-I, lower=FALSE)
display_markdown(sprintf('$\\text{p-valor} = %.4f$', p_value))
$\text{p-valor} = 0.8767$
data_2.aov <- aov(compression_strength ~ mixing_method, data_2)
data_2.tukey <- as.data.frame(TukeyHSD(data_2.aov, ordered = TRUE, conf.level = 0.95)[1]$mixing_method)
data_2.tukey$names <- c(rownames(data_2.tukey))
# Gráfico de los intervalos de confianza
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_2.tukey, aes(names, diff)) +
labs(
title="Intervalos de confianza de 95% para la diferencia de medias entre tratamientos",
x="",
y="",
col="Diferencia") +
geom_errorbar(aes(ymin=lwr, ymax=upr, col=ifelse(lwr*upr > 0,'1','2')), width = 0.4, alpha=1) +
scale_color_manual(values=c('#05b5f5','#f50505'), labels=c('Significativa','No significativa'), breaks=c('1','2')) +
geom_hline(yintercept=0, linetype="dashed", col="black") +
theme_light() +
coord_flip(expand = TRUE) +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
smoothed <- data.frame(with(data_2.aov, lowess(x = data_2.aov$fitted, y = data_2.aov$residuals)))
# Gráficos
options(repr.plot.width=14, repr.plot.height=8)
res_vs_fit <- ggplot(data_2.aov) +
geom_point(aes(x=data_2.aov$fitted, y=data_2.aov$residuals), color= '#ff9696', size=3) +
geom_path(data = smoothed, aes(x = x, y = y), col="#a399ff", size=1) +
geom_hline(linetype = 2, yintercept=0, alpha=0.2) +
ggtitle("Residuals vs Fitted") +
xlab("Fitted") +
ylab("Residuals") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
qq_plot <- ggplot(data_2.aov) +
stat_qq(aes(sample = .stdresid), color= '#ff9696', size=3) +
geom_abline(col="#a399ff", size=1) +
xlab("Theoretical Quantiles") +
ylab("Standardized Residuals") +
ggtitle("Normal Q-Q") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
plot_grid(res_vs_fit, qq_plot, ncol = 2)
| Conductividad | ||||
|---|---|---|---|---|
| Recubrimiento | 1 | 2 | 3 | 4 |
| 1 | 143 | 141 | 150 | 146 |
| 2 | 152 | 149 | 137 | 143 |
| 3 | 134 | 133 | 132 | 127 |
| 4 | 147 | 148 | 144 | 142 |
data_3 <- read.csv("./TP4_tables/data3.csv") # Leo los datos desde archivo .csv
data_3$protection <- factor(data_3$protection)
# Plot
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_3, aes(x=protection, y=conductivity, fill=protection)) +
labs(
title="Diagramas de cajas para los distintos recubrimientos",
x="Recubrimiento",
y="Conductividad",
fill="Niveles") +
geom_boxplot(alpha=0.5) +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
data_3.aov <- aov(conductivity ~ protection, data_3)
aov_test3 <- summary(data_3.aov)[[1]]
display_markdown('#### **ANOVA de un sentido:**')
aov_test3 <- cbind(c('Porcentaje de algodón', 'Residuos'), aov_test3)
colnames(aov_test3)[1] <- 'Source'
rownames(aov_test3) <- c()
table <- formattable(aov_test3, align=c('l', 'c', 'c', 'c', 'c', 'c'), list(`Source` = formatter("span",style = ~ style('text-align'='left'))))
as.htmlwidget(table, width="70%", height=NULL)
smoothed <- data.frame(with(data_3.aov, lowess(x = data_3.aov$fitted, y = data_3.aov$residuals)))
# Gráficos
options(repr.plot.width=14, repr.plot.height=8)
res_vs_fit <- ggplot(data_3.aov) +
geom_point(aes(x=data_3.aov$fitted, y=data_3.aov$residuals), color= '#ff9696', size=3) +
geom_path(data = smoothed, aes(x = x, y = y), col="#a399ff", size=1) +
geom_hline(linetype = 2, yintercept=0, alpha=0.2) +
ggtitle("Residuals vs Fitted") +
xlab("Fitted") +
ylab("Residuals") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
qq_plot <- ggplot(data_3.aov) +
stat_qq(aes(sample = .stdresid), color= '#ff9696', size=3) +
geom_abline(col="#a399ff", size=1) +
xlab("Theoretical Quantiles") +
ylab("Standardized Residuals") +
ggtitle("Normal Q-Q") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
plot_grid(res_vs_fit, qq_plot, ncol = 2)
data_3.levels <- split(data_3 , f=data_3$protection)
sample_mean <- mean(data_3.levels[[1]]$conductivity) # media muestral para el recubrimiento tipo 1
display_markdown(sprintf('$\\overline{X}_1 = %.f$', sample_mean))
$\overline{X}_1 = 145$
J <- sapply(data_3.levels, nrow) # cantidad de observaciones para cada tratamiento
I <- length(data_3.levels) # cantidad de niveles
N <- sum(J) # número total de observaciones
samples <- t(sapply(data_3.levels, function(x) {x$conductivity})) # matriz con todas las observaciones
treatment_means <- sapply(data_3.levels, function(x) {
mean(x$conductivity)
}) # media de cada tratamiento
residuals <- samples - treatment_means #residuos
SSE <- sum(residuals^2)
MSE <- SSE / (N - I)
display_markdown(sprintf('$MSE = %.4f$', MSE))
$MSE = 19.2083$
alpha <- 0.05
t <- qt(alpha/2, N-I, lower=FALSE) # distribución t de Student con alpha=0.05/2 y N-I grados de libertad
aux <- t * sqrt(MSE / J[1])
conf_int <- c(sample_mean - aux, sample_mean + aux) # intervalo de confianza
conf_int.df <- as.data.frame(cbind('Tratamiento 1', round(conf_int[1], 4), sample_mean, round(conf_int[2], 4)))
colnames(conf_int.df) <- c(' ', '2.5%', 'Media estimada', '97.5%')
display_markdown('#### **Intervalo de confianza del $\\textbf{95%}$ para la estimación de la media del recubrimiento tipo 1:**')
rownames(conf_int.df) <- c()
as.htmlwidget(formattable(conf_int.df, align='c', list(' ' = formatter("span",style = ~ style(
'font-weight'='bold', 'text-align'='left')))), width="50%")
x_bar_1 <- mean(data_3.levels[[1]]$conductivity) # media muestral para el recubrimiento tipo 1
x_bar_4 <- mean(data_3.levels[[4]]$conductivity) # media muestral para el recubrimiento tipo 4
x_bar_diff <- x_bar_1 - x_bar_4
display_markdown(sprintf('$\\overline{X}_1 - \\overline{X}_4 = %.2f$', x_bar_diff))
$\overline{X}_1 - \overline{X}_4 = -0.25$
alpha <- 0.01
t <- qt(alpha/2, N-I, lower=FALSE) # distribución t de Student con alpha=0.05/2 y N-I grados de libertad
aux <- t * sqrt(MSE / J[1] + MSE / J[4])
conf_int <- c(x_bar_diff - aux, x_bar_diff + aux) # intervalo de confianza
conf_int.df <- as.data.frame(cbind('μ1 - μ4', round(conf_int[1], 4), x_bar_diff, round(conf_int[2], 4)))
colnames(conf_int.df) <- c(' ', '0.5%', 'Valor estimado', '99.5%')
display_markdown('#### **Intervalo de confianza del $\\textbf{99%}$ para la estimación de $\\mu_1 - \\mu_4$:**')
rownames(conf_int.df) <- c()
as.htmlwidget(formattable(conf_int.df, align='c', list(' ' = formatter("span",style = ~ style(
'font-weight'='bold', 'text-align'='left')))), width="50%")
| Tiempo de respuesta (ms) | |||||
|---|---|---|---|---|---|
| Tipo de circuito | 1 | 2 | 3 | 4 | 5 |
| 1 | 19 | 22 | 20 | 18 | 25 |
| 2 | 20 | 21 | 33 | 27 | 40 |
| 3 | 16 | 15 | 18 | 26 | 17 |
data_4 <- read.csv("./TP4_tables/data4.csv") # Leo los datos desde archivo .csv
data_4$circuit <- factor(data_4$circuit)
# Plot
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_4, aes(x=circuit, y=response_time, fill=circuit)) +
labs(
title="Diagramas de cajas para los distintos tipos de circuitos",
x="Circuito",
y="Tiempo de respuesta (ms)",
fill="Niveles") +
geom_boxplot(alpha=0.5) +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
data_4.aov <- aov(response_time ~ circuit, data_4)
aov_test4 <- summary(data_4.aov)[[1]]
display_markdown('#### **ANOVA de un sentido:**')
aov_test4 <- cbind(c('Porcentaje de algodón', 'Residuos'), aov_test4)
colnames(aov_test4)[1] <- 'Source'
rownames(aov_test4) <- c()
table <- formattable(aov_test4, align=c('l', 'c', 'c', 'c', 'c', 'c'), list(`Source` = formatter("span",style = ~ style('text-align'='left'))))
as.htmlwidget(table, width="70%", height=NULL)
smoothed <- data.frame(with(data_4.aov, lowess(x = data_4.aov$fitted, y = data_4.aov$residuals)))
# Gráficos
options(repr.plot.width=14, repr.plot.height=8)
res_vs_fit <- ggplot(data_4.aov) +
geom_point(aes(x=data_4.aov$fitted, y=data_4.aov$residuals), color= '#ff9696', size=3) +
geom_path(data = smoothed, aes(x = x, y = y), col="#a399ff", size=1) +
geom_hline(linetype = 2, yintercept=0, alpha=0.2) +
ggtitle("Residuals vs Fitted") +
xlab("Fitted") +
ylab("Residuals") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
qq_plot <- ggplot(data_4.aov) +
stat_qq(aes(sample = .stdresid), color= '#ff9696', size=3) +
geom_abline(col="#a399ff", size=1) +
xlab("Theoretical Quantiles") +
ylab("Standardized Residuals") +
ggtitle("Normal Q-Q") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
plot_grid(res_vs_fit, qq_plot, ncol = 2)
data_4.levels <- split(data_4 , f=data_4$circuit)
x_bar_3 <- mean(data_4.levels[[3]]$response_time) # media muestral para el recubrimiento tipo 4
display_markdown(sprintf('$\\overline{X}_3 = %.2f$', x_bar_3))
$\overline{X}_3 = 18.40$
J <- sapply(data_4.levels, nrow) # cantidad de observaciones para cada tratamiento
I <- length(data_4.levels) # cantidad de niveles
N <- sum(J) # número total de observaciones
samples <- t(sapply(data_4.levels, function(x) {x$response_time})) # matriz con todas las observaciones
treatment_means <- sapply(data_4.levels, function(x) {
mean(x$response_time)
}) # media de cada tratamiento
residuals <- samples - treatment_means #residuos
SSE <- sum(residuals^2)
MSE <- SSE / (N - I)
display_markdown(sprintf('$MSE = %.4f$', MSE))
$MSE = 32.5667$
alpha <- 0.05
t <- qt(alpha/2, N-I, lower=FALSE) # distribución t de Student con alpha=0.05/2 y N-I grados de libertad
aux <- t * sqrt(MSE / J[1])
conf_int <- c(x_bar_3 - aux, x_bar_3 + aux) # intervalo de confianza
conf_int.df <- as.data.frame(cbind('Tratamiento 3', round(conf_int[1], 4), sample_mean, round(conf_int[2], 4)))
colnames(conf_int.df) <- c(' ', '2.5%', 'Media estimada', '97.5%')
display_markdown('#### **Intervalo de confianza del $\\textbf{95%}$ para el tiempo de respuesta del tercer cicuito:**')
rownames(conf_int.df) <- c()
as.htmlwidget(formattable(conf_int.df, align='c', list(' ' = formatter("span",style = ~ style(
'font-weight'='bold', 'text-align'='left')))), width="50%")
| Temperatura (ºC) | Volumen (cc) | ||||
|---|---|---|---|---|---|
| 70.0 | 1245 | 1235 | 1285 | 1245 | 1235 |
| 75.0 | 1235 | 1240 | 1200 | 1220 | 1210 |
| 80.0 | 1225 | 1200 | 1170 | 1155 | 1095 |
data_5 <- read.csv("./TP4_tables/data5.csv") # Leo los datos desde archivo .csv
data_5$temperature <- factor(data_5$temperature)
# Plot
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_5, aes(x=temperature, y=volume, fill=temperature)) +
labs(
title="Diagramas de cajas para las distintas temperaturas",
x="Temperatura de secado (ºC)",
y="Volumen (cc)",
fill="Niveles") +
geom_boxplot(alpha=0.5) +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
data_5.levels <- split(data_5 , f=data_5$temperature)
model_5 <- lm(volume ~ temperature, data_5)
# Cálculo de MSTr
J <- sapply(data_5.levels, nrow) # cantidad de observaciones para cada tratamiento
I <- length(data_5.levels) # cantidad de niveles
SSTr <- sum((predict(model_5) - mean(data_5$volume))^2)
MSTr <- SSTr / (I - 1)
display_markdown(sprintf('$MSTr = %.2f$', MSTr))
$MSTr = 8240.00$
# Cálculo de MSE
N <- sum(J) # cantidad total de observaciones
SSE <- sum(model_5$residuals^2)
MSE <- SSE / (N - I)
display_markdown(sprintf('$MSE = %.2f$', MSE))
$MSE = 1050.83$
F <- MSTr / MSE
display_markdown(sprintf('$F = %.4f$', F))
$F = 7.8414$
alpha <- 0.01
f_alpha <- qf(alpha, df1=I-1, df2=N-I, lower=FALSE)
display_markdown(sprintf('$f_{\\alpha=%.3f,\\: %.f,\\: %.f} = %.4f$', alpha, I-1, N-I, f_alpha))
$f_{\alpha=0.010,\: 2,\: 12} = 6.9266$
p_value <- pf(F, df1=I-1, df2=N-I, lower=FALSE)
display_markdown(sprintf('$\\text{p-valor} = %.4f$', p_value))
$\text{p-valor} = 0.0066$
data_5.aov <- aov(volume ~ temperature, data_5)
data_5.tukey <- as.data.frame(TukeyHSD(data_5.aov, ordered = TRUE, conf.level = 0.95)[1]$temperature)
data_5.tukey$names <- c(rownames(data_5.tukey))
# Gráfico de los intervalos de confianza
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_5.tukey, aes(names, diff)) +
labs(
title="Intervalos de confianza de 95% para la diferencia de medias entre tratamientos",
x="",
y="",
col="Diferencia") +
geom_errorbar(aes(ymin=lwr, ymax=upr, col=ifelse(lwr*upr > 0,'1','2')), width = 0.4, alpha=1) +
scale_color_manual(values=c('#05b5f5','#f50505'), labels=c('Significativa','No significativa'), breaks=c('1','2')) +
geom_hline(yintercept=0, linetype="dashed", col="black") +
theme_light() +
coord_flip(expand = TRUE) +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
smoothed <- data.frame(with(data_5.aov, lowess(x = data_5.aov$fitted, y = data_5.aov$residuals)))
# Gráficos
options(repr.plot.width=14, repr.plot.height=8)
res_vs_fit <- ggplot(data_5.aov) +
geom_point(aes(x=data_5.aov$fitted, y=data_5.aov$residuals), color= '#ff9696', size=3) +
geom_path(data = smoothed, aes(x = x, y = y), col="#a399ff", size=1) +
geom_hline(linetype = 2, yintercept=0, alpha=0.2) +
ggtitle("Residuals vs Fitted") +
xlab("Fitted") +
ylab("Residuals") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
qq_plot <- ggplot(data_5.aov) +
stat_qq(aes(sample = .stdresid), color= '#ff9696', size=3) +
geom_abline(col="#a399ff", size=1) +
xlab("Theoretical Quantiles") +
ylab("Standardized Residuals") +
ggtitle("Normal Q-Q") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
plot_grid(res_vs_fit, qq_plot, ncol = 2)
## TODO analizar los graficos de arriba
| Tipo de punta | Espécimen | |||
|---|---|---|---|---|
| 1 | 2 | 3 | 4 | |
| 1 | 9.3 | 9.4 | 9.6 | 10.0 |
| 2 | 9.4 | 9.3 | 9.8 | 9.9 |
| 3 | 9.2 | 9.4 | 9.5 | 9.7 |
| 4 | 9.7 | 9.6 | 10.0 | 10.2 |
data_6 <- read.csv("./TP4_tables/data6.csv") # Leo los datos desde archivo .csv
data_6$indenter <- factor(data_6$indenter)
data_6$specimen <- factor(data_6$specimen)
# Plot
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_6, aes(x=indenter, y=hardness, fill=indenter)) +
labs(
title="Diagramas de cajas para los distintos tipos de punta",
x="Tipo de punta",
y="Dureza",
fill="Tipo de punta",
col="Espécimen") +
geom_boxplot(alpha=0.5, aes(fill=indenter)) +
geom_point(aes(col=specimen), size=4) +
geom_point(size=4, shape=1) +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5)) +
guides(fill = guide_legend(override.aes = list(shape = NA), order = 1))
data_6.aov <- aov(hardness ~ indenter + specimen, data_6)
aov_test6 <- summary(data_6.aov)[[1]]
display_markdown('#### **ANOVA de dos sentidos: Dureza vs Tipo de punta + Espécimen**')
display_markdown('\n')
aov_test6 <- cbind(c('Tipo de punta', 'Espécimen', 'Residuos'), aov_test6)
colnames(aov_test6)[1] <- 'Source'
rownames(aov_test6) <- c()
table <- formattable(aov_test6, align=c('l', 'c', 'c', 'c', 'c', 'c'), list(`Source` = formatter("span",style = ~ style('text-align'='left'))))
as.htmlwidget(table, width="70%", height=NULL)
data_6.tukey <- as.data.frame(TukeyHSD(data_6.aov, ordered = TRUE, conf.level = 0.95)[1]$indenter)
data_6.tukey$names <- c(rownames(data_6.tukey))
# Gráfico de los intervalos de confianza
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_6.tukey, aes(names, diff)) +
labs(
title="Intervalos de confianza de 95% para la diferencia de medias entre tratamientos",
x="",
y="",
col="Diferencia") +
geom_errorbar(aes(ymin=lwr, ymax=upr, col=ifelse(lwr*upr > 0,'1','2')), width = 0.4, alpha=1) +
scale_color_manual(values=c('#05b5f5','#f50505'), labels=c('Significativa','No significativa'), breaks=c('1','2')) +
geom_hline(yintercept=0, linetype="dashed", col="black") +
theme_light() +
coord_flip(expand = TRUE) +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
smoothed <- data.frame(with(data_6.aov, lowess(x = data_6.aov$fitted, y = data_6.aov$residuals)))
# Gráficos
options(repr.plot.width=14, repr.plot.height=8)
res_vs_fit <- ggplot(data_6.aov) +
geom_point(aes(x=data_6.aov$fitted, y=data_6.aov$residuals), color= '#ff9696', size=3) +
geom_path(data = smoothed, aes(x = x, y = y), col="#a399ff", size=1) +
geom_hline(linetype = 2, yintercept=0, alpha=0.2) +
ggtitle("Residuals vs Fitted") +
xlab("Fitted") +
ylab("Residuals") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
qq_plot <- ggplot(data_6.aov) +
stat_qq(aes(sample = .stdresid), color= '#ff9696', size=3) +
geom_abline(col="#a399ff", size=1) +
xlab("Theoretical Quantiles") +
ylab("Standardized Residuals") +
ggtitle("Normal Q-Q") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
plot_grid(res_vs_fit, qq_plot, ncol = 2)
## TODO analizar los graficos de arriba
| Algoritmo | Proyecto | |||||||
|---|---|---|---|---|---|---|---|---|
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | |
| 1 | 1244 | 21 | 82 | 2221 | 905 | 839 | 527 | 122 |
| 2 | 281 | 129 | 396 | 1306 | 336 | 910 | 473 | 199 |
| 3 | 220 | 84 | 458 | 543 | 300 | 794 | 488 | 142 |
| 4 | 225 | 83 | 425 | 552 | 291 | 826 | 509 | 153 |
| 5 | 19 | 11 | -34 | 121 | 15 | 103 | 87 | -17 |
| 6 | -20 | 35 | -53 | 170 | 104 | 199 | 142 | 41 |
data_7 <- read.csv("./TP4_tables/data7.csv") # Leo los datos desde archivo .csv
data_7$algorithm <- factor(data_7$algorithm)
data_7$project <- factor(data_7$project)
# Plot
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_7, aes(x=algorithm, y=estimation_error, fill=algorithm)) +
labs(
title="Diagramas de cajas para los distintos algoritmos",
x="Algoritmo",
y="Error en la estimación del costo promedio",
fill="Algoritmo",
col="Proyecto") +
geom_boxplot(alpha=0.5, aes(fill=algorithm)) +
#geom_point(aes(col=project), size=4, alpha=0.5) +
#geom_point(shape=1, size=4) +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5)) +
guides(fill = guide_legend(override.aes = list(shape = NA), order = 1))
data_7.aov <- aov(estimation_error ~ algorithm + project, data_7)
aov_test7 <- summary(data_7.aov)[[1]]
display_markdown('#### **ANOVA de dos sentidos: Error de estimación vs Algoritmo + Proyecto**')
display_markdown('\n')
aov_test7 <- cbind(c('Algoritmo', 'Proyecto', 'Residuos'), aov_test7)
colnames(aov_test7)[1] <- 'Source'
rownames(aov_test7) <- c()
table <- formattable(aov_test7, align=c('l', 'c', 'c', 'c', 'c', 'c'), list(`Source` = formatter("span",style = ~ style('text-align'='left'))))
as.htmlwidget(table, width="70%", height=NULL)
smoothed <- data.frame(with(data_7.aov, lowess(x = data_7.aov$fitted, y = data_7.aov$residuals)))
# Gráficos
options(repr.plot.width=14, repr.plot.height=8)
res_vs_fit <- ggplot(data_7.aov) +
geom_point(aes(x=data_7.aov$fitted, y=data_7.aov$residuals), color= '#ff9696', size=3) +
geom_path(data = smoothed, aes(x = x, y = y), col="#a399ff", size=1) +
geom_hline(linetype = 2, yintercept=0, alpha=0.2) +
ggtitle("Residuals vs Fitted") +
xlab("Fitted") +
ylab("Residuals") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
qq_plot <- ggplot(data_7.aov) +
stat_qq(aes(sample = .stdresid), color= '#ff9696', size=3) +
geom_abline(col="#a399ff", size=1) +
xlab("Theoretical Quantiles") +
ylab("Standardized Residuals") +
ggtitle("Normal Q-Q") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
plot_grid(res_vs_fit, qq_plot, ncol = 2)
data_7.tukey <- as.data.frame(TukeyHSD(data_7.aov, ordered = TRUE, conf.level = 0.95)[1]$algorithm)
data_7.tukey$names <- c(rownames(data_7.tukey))
# Gráfico de los intervalos de confianza
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_7.tukey, aes(names, diff)) +
labs(
title="Intervalos de confianza de 95% para la diferencia de medias entre tratamientos",
x="",
y="",
col="Diferencia") +
geom_errorbar(aes(ymin=lwr, ymax=upr, col=ifelse(lwr*upr > 0,'1','2')), width = 0.4, alpha=1) +
scale_color_manual(values=c('#05b5f5','#f50505'), labels=c('Significativa','No significativa'), breaks=c('1','2')) +
geom_hline(yintercept=0, linetype="dashed", col="black") +
theme_light() +
coord_flip(expand = TRUE) +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
data_7.levels <- split(data_7 , f=data_7$algorithm)
x_bar <- sapply(data_7.levels, function(x) {
mean(x$estimation_error)
}) # media de cada tratamiento
# Cálculo de MSE
model_7 <- lm(estimation_error ~ algorithm + project, data_7)
J <- sapply(data_7.levels, nrow) # cantidad de observaciones para cada tratamiento
I <- length(data_7.levels) # cantidad de niveles
N <- sum(J) # cantidad total de observaciones
SSE <- sum(model_7$residuals^2)
MSE <- SSE / (N - I)
display_markdown(sprintf('$MSE = %.2f$', MSE))
$MSE = 75602.14$
alpha <- 0.05
t <- qt(alpha/2, N-I, lower=FALSE) # distribución t de Student con alpha=0.05/2 y N-I grados de libertad
aux <- t * sqrt(MSE / J)
conf_int <- matrix(c(x_bar - aux, x_bar + aux), ncol=2, byrow=FALSE) # intervalos de confianza
conf_int.df <- as.data.frame(data.frame(paste("Algoritmo", 1:6, sep=" "), conf_int[,1], x_bar, conf_int[,2]))
colnames(conf_int.df) = c(" ", "2.5%", "Media estimada", "97.5%")
contains_zero <- conf_int.df[,"2.5%"] * conf_int.df[,"97.5%"] <= 0 # TRUE si el intervalo contiene al cero
conf_int.df <- cbind(conf_int.df, contains_zero)
colnames(conf_int.df)[5] <- "Contiene al 0"
display_markdown('#### **Intervalos de confianza para medias de tratamiento**')
display_markdown('\n')
table <- formattable(conf_int.df, align=c('l', 'c', 'c', 'c', 'c'), list(`Contiene al 0` = formatter("span",
x ~ ifelse(x, "✔ Sí", "✘ No"),
style = x ~ style(color = ifelse(x, "green", "red"))), `Media estimada` = formatter("span", style = ~ style("font.weight" = "bold")), ` ` = formatter("span", style = ~ style("font.weight" = "bold"))))
#table <- format_table(table, list(area(1:2) ~ color_tile("transparent", "lightgray")))
as.htmlwidget(table, width="50%", height=NULL)
colnames(conf_int.df) <- c("algorithm", "lwr", "mean", "upr", "contains_zero")
# Gráfico de los intervalos de confianza
options(repr.plot.width=14, repr.plot.height=8)
ggplot(conf_int.df, aes(algorithm)) +
labs(
title="Intervalos de confianza para medias de tratamiento",
x="",
y="",
col="Contiene al cero") +
geom_errorbar(aes(ymin=lwr, ymax=upr, col=ifelse(contains_zero==TRUE,'1','2')), width = 0.4, alpha=1) +
scale_color_manual(values=c('#05b5f5','#f50505'), labels=c('✔ Sí','✘ No'), breaks=c('1','2')) +
geom_hline(yintercept=0, linetype="dashed", col="black") +
theme_light() +
coord_flip(expand = TRUE) +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
| Material | Temperatura (ºF) | |||||
|---|---|---|---|---|---|---|
| baja | media | alta | ||||
| 1 | 130 | 155 | 34 | 40 | 20 | 70 |
| 74 | 180 | 80 | 75 | 82 | 58 | |
| 2 | 150 | 188 | 136 | 122 | 25 | 70 |
| 159 | 126 | 106 | 115 | 58 | 45 | |
| 3 | 138 | 110 | 174 | 120 | 96 | 104 |
| 168 | 160 | 150 | 139 | 82 | 60 | |
data_8 <- read.csv("./TP4_tables/data8.csv") # Leo los datos desde archivo .csv
data_8$material <- factor(data_8$material)
data_8$temperature <- factor(data_8$temperature)
options(dplyr.summarise.inform = FALSE)
data_8.mean <- data_8 %>%
group_by(material, temperature) %>%
summarise(cell_mean=mean(lifespan)) # media de cada tratamiento
data_8.mean <- data_8.mean %>%
group_by(material) %>%
summarise(material_mean=mean(cell_mean), across()) # media de cada material
data_8.mean <- data_8.mean %>%
group_by(temperature) %>%
summarise(temperature_mean=mean(cell_mean), across()) # media de cada nivel de temperatura
data_8.mean <- data_8.mean %>%
arrange(match(temperature, c("low", "mid", "high"))) %>%
arrange(match(material, c(1, 2, 3))) %>%
select(material, temperature, cell_mean, material_mean, temperature_mean)
eng_colnames <- colnames(data_8.mean)
colnames(data_8.mean) <- c("material", "temperatura", "media de la celda", "media de la fila", "media de la columna")
as.htmlwidget(formattable(data_8.mean, align="c"), width="80%", height=NULL)
data_8.aov <- summary(aov(lifespan ~ material * temperature, data_8))[[1]]
display_markdown('#### **ANOVA de dos sentidos**')
display_markdown('\n')
data_8.aov <- cbind(c('Material', 'Temperatura', 'Interacción', 'Residuos'), data_8.aov)
colnames(data_8.aov)[1] <- 'Source'
rownames(data_8.aov) <- c()
data_8.aov["Pr(>F)"] <- round(data_8.aov["Pr(>F)"], 4)
table <- formattable(data_8.aov, align=c('l', 'c', 'c', 'c', 'c', 'c'), list(`Source` = formatter("span",style = ~ style('text-align'='left'))))
as.htmlwidget(table, width="70%", height=NULL)
# Plot
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_7, aes(x=algorithm, y=estimation_error, fill=algorithm)) +
labs(
title="Diagramas de cajas para los distintos algoritmos",
x="Algoritmo",
y="Error en la estimación del costo promedio",
fill="Algoritmo",
col="Proyecto") +
geom_boxplot(alpha=0.5, aes(fill=algorithm)) +
#geom_point(aes(col=project), size=4, alpha=0.5) +
#geom_point(shape=1, size=4) +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5)) +
guides(fill = guide_legend(override.aes = list(shape = NA), order = 1))
data_7.aov <- aov(estimation_error ~ algorithm + project, data_7)
aov_test7 <- summary(data_7.aov)[[1]]
display_markdown('#### **ANOVA de dos sentidos: Error de estimación vs Algoritmo + Proyecto**')
display_markdown('\n')
aov_test7 <- cbind(c('Algoritmo', 'Proyecto', 'Residuos'), aov_test7)
colnames(aov_test7)[1] <- 'Source'
rownames(aov_test7) <- c()
table <- formattable(aov_test7, align=c('l', 'c', 'c', 'c', 'c', 'c'), list(`Source` = formatter("span",style = ~ style('text-align'='left'))))
as.htmlwidget(table, width="70%", height=NULL)
smoothed <- data.frame(with(data_7.aov, lowess(x = data_7.aov$fitted, y = data_7.aov$residuals)))
# Gráficos
options(repr.plot.width=14, repr.plot.height=8)
res_vs_fit <- ggplot(data_7.aov) +
geom_point(aes(x=data_7.aov$fitted, y=data_7.aov$residuals), color= '#ff9696', size=3) +
geom_path(data = smoothed, aes(x = x, y = y), col="#a399ff", size=1) +
geom_hline(linetype = 2, yintercept=0, alpha=0.2) +
ggtitle("Residuals vs Fitted") +
xlab("Fitted") +
ylab("Residuals") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
qq_plot <- ggplot(data_7.aov) +
stat_qq(aes(sample = .stdresid), color= '#ff9696', size=3) +
geom_abline(col="#a399ff", size=1) +
xlab("Theoretical Quantiles") +
ylab("Standardized Residuals") +
ggtitle("Normal Q-Q") +
theme_light() +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
plot_grid(res_vs_fit, qq_plot, ncol = 2)
data_7.tukey <- as.data.frame(TukeyHSD(data_7.aov, ordered = TRUE, conf.level = 0.95)[1]$algorithm)
data_7.tukey$names <- c(rownames(data_7.tukey))
# Gráfico de los intervalos de confianza
options(repr.plot.width=14, repr.plot.height=8)
ggplot(data_7.tukey, aes(names, diff)) +
labs(
title="Intervalos de confianza de 95% para la diferencia de medias entre tratamientos",
x="",
y="",
col="Diferencia") +
geom_errorbar(aes(ymin=lwr, ymax=upr, col=ifelse(lwr*upr > 0,'1','2')), width = 0.4, alpha=1) +
scale_color_manual(values=c('#05b5f5','#f50505'), labels=c('Significativa','No significativa'), breaks=c('1','2')) +
geom_hline(yintercept=0, linetype="dashed", col="black") +
theme_light() +
coord_flip(expand = TRUE) +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))
data_7.levels <- split(data_7 , f=data_7$algorithm)
x_bar <- sapply(data_7.levels, function(x) {
mean(x$estimation_error)
}) # media de cada tratamiento
# Cálculo de MSE
model_7 <- lm(estimation_error ~ algorithm + project, data_7)
J <- sapply(data_7.levels, nrow) # cantidad de observaciones para cada tratamiento
I <- length(data_7.levels) # cantidad de niveles
N <- sum(J) # cantidad total de observaciones
SSE <- sum(model_7$residuals^2)
MSE <- SSE / (N - I)
display_markdown(sprintf('$MSE = %.2f$', MSE))
$MSE = 75602.14$
alpha <- 0.05
t <- qt(alpha/2, N-I, lower=FALSE) # distribución t de Student con alpha=0.05/2 y N-I grados de libertad
aux <- t * sqrt(MSE / J)
conf_int <- matrix(c(x_bar - aux, x_bar + aux), ncol=2, byrow=FALSE) # intervalos de confianza
conf_int.df <- as.data.frame(data.frame(paste("Algoritmo", 1:6, sep=" "), conf_int[,1], x_bar, conf_int[,2]))
colnames(conf_int.df) = c(" ", "2.5%", "Media estimada", "97.5%")
contains_zero <- conf_int.df[,"2.5%"] * conf_int.df[,"97.5%"] <= 0 # TRUE si el intervalo contiene al cero
conf_int.df <- cbind(conf_int.df, contains_zero)
colnames(conf_int.df)[5] <- "Contiene al 0"
display_markdown('#### **ANOVA de dos sentidos: Error de estimación vs Algoritmo + Proyecto**')
display_markdown('\n')
table <- formattable(conf_int.df, align=c('l', 'c', 'c', 'c', 'c'), list(`Contiene al 0` = formatter("span",
x ~ ifelse(x, "✔ Sí", "✘ No"),
style = x ~ style(color = ifelse(x, "green", "red"))), `Media estimada` = formatter("span", style = ~ style("font.weight" = "bold")), ` ` = formatter("span", style = ~ style("font.weight" = "bold"))))
#table <- format_table(table, list(area(1:2) ~ color_tile("transparent", "lightgray")))
as.htmlwidget(table, width="50%", height=NULL)
colnames(conf_int.df) <- c("algorithm", "lwr", "mean", "upr", "contains_zero")
# Gráfico de los intervalos de confianza
options(repr.plot.width=14, repr.plot.height=8)
ggplot(conf_int.df, aes(algorithm)) +
labs(
title="Intervalos de confianza de 95% para las medias de los distintos tratamientos",
x="",
y="",
col="Contiene al cero") +
geom_errorbar(aes(ymin=lwr, ymax=upr, col=ifelse(contains_zero==TRUE,'1','2')), width = 0.4, alpha=1) +
scale_color_manual(values=c('#05b5f5','#f50505'), labels=c('✔ Sí','✘ No'), breaks=c('1','2')) +
geom_hline(yintercept=0, linetype="dashed", col="black") +
theme_light() +
coord_flip(expand = TRUE) +
theme(text=element_text(size=20),
plot.title = element_text(size=24, hjust = 0.5))